package com.lhb.test.tika;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URL;
import java.util.Collections;
import java.util.Set;

import org.apache.tika.Tika;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.epub.EpubParser;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.parser.iwork.IWorkPackageParser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.OldExcelParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.odf.OpenDocumentParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.rtf.RTFParser;
import org.apache.tika.parser.txt.TXTParser;
import org.apache.tika.parser.xml.DcXMLParser;
public class TikaUtil {
	private static final Set<MediaType> EXCLUDES = Collections
			.singleton(MediaType.application("x-tika-ooxml"));

	private static final Parser[] PARSERS = { new HtmlParser(),
			new RTFParser(), new PDFParser(), new TXTParser(),
			new OfficeParser(), new OldExcelParser(),
			ParserDecorator.withoutTypes(new OOXMLParser(), EXCLUDES),
			new OpenDocumentParser(), new IWorkPackageParser(),
			new DcXMLParser(), new EpubParser() };

	private static final AutoDetectParser PARSER_INSTANCE = new AutoDetectParser(
			PARSERS);

//	private static final Tika TIKA_INSTANCE = new Tika(PARSER_INSTANCE.getDetector(), PARSER_INSTANCE);
	private static final Tika TIKA_INSTANCE = new Tika();

	public static void main(String[] args) throws Exception {
		File file = new File("D:\\我的文档\\财政平台\\文档\\oss_apidoc.zip");
		InputStream is = new FileInputStream(file);
		String result = TIKA_INSTANCE.parseToString(is);
		System.out.println("..................");
		System.out.println(result);
		System.out.println("22..................");
		result = TIKA_INSTANCE.parseToString(new URL("http://127.0.0.1:9100/"));
		System.out.println(result);
	}
	
}
